This patch introduces xen compile time option, xen_ia64_tlbflush_clock=y.
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
xen_ia64_pervcpu_vhpt ?= y
xen_ia64_tlb_track ?= y
xen_ia64_tlb_track_cnt ?= n
+xen_ia64_tlbflush_clock ?= y
ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
ifeq ($(xen_ia64_tlb_track_cnt),y)
CFLAGS += -DCONFIG_TLB_TRACK_CNT
endif
+ifeq ($(xen_ia64_tlbflush_clock),y)
+CFLAGS += -DCONFIG_XEN_IA64_TLBFLUSH_CLOCK
+endif
ifeq ($(no_warns),y)
CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized
endif
local_flush_tlb_all (void)
{
unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
-
+#ifdef XEN
+ /* increment flush clock before mTLB flush */
+ u32 flush_time = tlbflush_clock_inc_and_return();
+#endif
addr = local_cpu_data->ptce_base;
count0 = local_cpu_data->ptce_count[0];
count1 = local_cpu_data->ptce_count[1];
}
local_irq_restore(flags);
ia64_srlz_i(); /* srlz.i implies srlz.d */
+#ifdef XEN
+ /* update after mTLB flush. */
+ tlbflush_update_time(&__get_cpu_var(tlbflush_time), flush_time);
+#endif
}
EXPORT_SYMBOL(local_flush_tlb_all);
obj-$(crash_debug) += gdbstub.o
obj-$(xen_ia64_tlb_track) += tlb_track.o
+obj-$(xen_ia64_tlbflush_clock) += flushtlb.o
ia64_set_pta(VHPT_SIZE_LOG2 << 2);
}
-static void flush_vtlb_for_context_switch(struct vcpu* vcpu)
+static void flush_vtlb_for_context_switch(struct vcpu* prev, struct vcpu* next)
{
int cpu = smp_processor_id();
- int last_vcpu_id = vcpu->domain->arch.last_vcpu[cpu].vcpu_id;
- int last_processor = vcpu->arch.last_processor;
+ int last_vcpu_id, last_processor;
- if (is_idle_domain(vcpu->domain))
+ if (!is_idle_domain(prev->domain))
+ tlbflush_update_time
+ (&prev->domain->arch.last_vcpu[cpu].tlbflush_timestamp,
+ tlbflush_current_time());
+
+ if (is_idle_domain(next->domain))
return;
-
- vcpu->domain->arch.last_vcpu[cpu].vcpu_id = vcpu->vcpu_id;
- vcpu->arch.last_processor = cpu;
- if ((last_vcpu_id != vcpu->vcpu_id &&
+ last_vcpu_id = next->domain->arch.last_vcpu[cpu].vcpu_id;
+ last_processor = next->arch.last_processor;
+
+ next->domain->arch.last_vcpu[cpu].vcpu_id = next->vcpu_id;
+ next->arch.last_processor = cpu;
+
+ if ((last_vcpu_id != next->vcpu_id &&
last_vcpu_id != INVALID_VCPU_ID) ||
- (last_vcpu_id == vcpu->vcpu_id &&
+ (last_vcpu_id == next->vcpu_id &&
last_processor != cpu &&
last_processor != INVALID_PROCESSOR)) {
+#ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
+ u32 last_tlbflush_timestamp =
+ next->domain->arch.last_vcpu[cpu].tlbflush_timestamp;
+#endif
+ int vhpt_is_flushed = 0;
// if the vTLB implementation was changed,
// the followings must be updated either.
- if (VMX_DOMAIN(vcpu)) {
+ if (VMX_DOMAIN(next)) {
// currently vTLB for vt-i domian is per vcpu.
// so any flushing isn't needed.
- } else if (HAS_PERVCPU_VHPT(vcpu->domain)) {
+ } else if (HAS_PERVCPU_VHPT(next->domain)) {
// nothing to do
} else {
- local_vhpt_flush();
+ if (NEED_FLUSH(__get_cpu_var(vhpt_tlbflush_timestamp),
+ last_tlbflush_timestamp)) {
+ local_vhpt_flush();
+ vhpt_is_flushed = 1;
+ }
+ }
+ if (vhpt_is_flushed || NEED_FLUSH(__get_cpu_var(tlbflush_time),
+ last_tlbflush_timestamp)) {
+ local_flush_tlb_all();
+ perfc_incrc(tlbflush_clock_cswitch_purge);
+ } else {
+ perfc_incrc(tlbflush_clock_cswitch_skip);
}
- local_flush_tlb_all();
perfc_incrc(flush_vtlb_for_context_switch);
}
}
(current->domain->arch.shared_info_va + XSI_PSR_IC_OFS);
migrate_timer(¤t->arch.hlt_timer, current->processor);
}
- flush_vtlb_for_context_switch(current);
+ flush_vtlb_for_context_switch(prev, current);
}
void context_switch(struct vcpu *prev, struct vcpu *next)
}
}
local_irq_restore(spsr);
- flush_vtlb_for_context_switch(current);
+ flush_vtlb_for_context_switch(prev, current);
context_saved(prev);
}
for (i = 0; i < (1 << order); i++)
share_xen_page_with_guest(virt_to_page(v->arch.privregs) +
i, d, XENSHARE_writable);
+
+ tlbflush_update_time(&v->arch.tlbflush_timestamp,
+ tlbflush_current_time());
}
v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
--- /dev/null
+/******************************************************************************
+ * flushtlb.c
+ * based on x86 flushtlb.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <xen/sched.h>
+#include <xen/softirq.h>
+#include <asm/vcpu.h>
+#include <asm/vhpt.h>
+#include <asm/flushtlb.h>
+
+/* Debug builds: Wrap frequently to stress-test the wrap logic. */
+#ifdef NDEBUG
+#define WRAP_MASK (0xFFFFFFFFU)
+#else
+#define WRAP_MASK (0x000003FFU)
+#endif
+
+volatile u32 tlbflush_clock = 1U; /* 1 greater than tlbflush_time. */
+DEFINE_PER_CPU(volatile u32, tlbflush_time);
+
+u32
+tlbflush_clock_inc_and_return(void)
+{
+ u32 t, t1, t2;
+
+ t = tlbflush_clock;
+ do {
+ t1 = t2 = t;
+ /* Clock wrapped: someone else is leading a global TLB shootdown. */
+ if (unlikely(t1 == 0))
+ return t2;
+ t2 = (t + 1) & WRAP_MASK;
+ t = ia64_cmpxchg(acq, &tlbflush_clock, t1, t2, sizeof(tlbflush_clock));
+ } while (unlikely(t != t1));
+
+ /* Clock wrapped: we will lead a global TLB shootdown. */
+ if (unlikely(t2 == 0))
+ raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
+
+ return t2;
+}
+
+void
+new_tlbflush_clock_period(void)
+{
+ /*
+ *XXX TODO
+ * If flushing all vcpu's vhpt takes too long, it can be done backgroundly.
+ * In such case tlbflush time comparison is done using only 31bit
+ * similar to linux jiffies comparison.
+ * vhpt should be flushed gradually before wraping 31bits.
+ *
+ * Sample calculation.
+ * Currently Xen/IA64 can create up to 64 domains at the same time.
+ * Vhpt size is currently 64KB. (This might be changed later though)
+ * Suppose each domains have 4 vcpus (or 16 vcpus).
+ * then the memory size which must be flushed is 16MB (64MB).
+ */
+ struct domain* d;
+ struct vcpu* v;
+ /* flush all vhpt of vcpu of all existing domain. */
+ read_lock(&domlist_lock);
+ for_each_domain(d) {
+ for_each_vcpu(d, v) {
+ vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+ vcpu_purge_tr_entry(&PSCBX(v,itlb));
+ }
+ }
+ smp_mb();
+ for_each_domain(d) {
+ for_each_vcpu(d, v) {
+ if (HAS_PERVCPU_VHPT(v->domain))
+ vcpu_vhpt_flush(v);
+ }
+ }
+ read_unlock(&domlist_lock);
+ /* unlock has release semantics */
+
+ /* flush all vhpt of physical cpu and mTLB */
+ on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
+
+ /*
+ * if global TLB shootdown is finished, increment tlbflush_time
+ * atomic operation isn't necessary because we know that tlbflush_clock
+ * stays 0.
+ */
+ tlbflush_clock++;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
DEFINE_PER_CPU (unsigned long, vhpt_paddr);
DEFINE_PER_CPU (unsigned long, vhpt_pend);
+#ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
+DEFINE_PER_CPU(volatile u32, vhpt_tlbflush_timestamp);
+#endif
static void
__vhpt_flush(unsigned long vhpt_maddr)
void
local_vhpt_flush(void)
{
+ /* increment flush clock before flush */
+ u32 flush_time = tlbflush_clock_inc_and_return();
__vhpt_flush(__ia64_per_cpu_var(vhpt_paddr));
+ /* this must be after flush */
+ tlbflush_update_time(&__get_cpu_var(vhpt_tlbflush_timestamp),
+ flush_time);
perfc_incrc(local_vhpt_flush);
}
-static void
+void
vcpu_vhpt_flush(struct vcpu* v)
{
+ /* increment flush clock before flush */
+ u32 flush_time = tlbflush_clock_inc_and_return();
__vhpt_flush(vcpu_vhpt_maddr(v));
+ /* this must be after flush */
+ tlbflush_update_time(&v->arch.tlbflush_timestamp, flush_time);
perfc_incrc(vcpu_vhpt_flush);
}
struct last_vcpu {
#define INVALID_VCPU_ID INT_MAX
int vcpu_id;
+#ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
+ u32 tlbflush_timestamp;
+#endif
} ____cacheline_aligned_in_smp;
/* These are data in domain memory for SAL emulator. */
struct page_info* vhpt_page;
unsigned long vhpt_entries;
#endif
-
+#ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
+ u32 tlbflush_timestamp;
+#endif
#define INVALID_PROCESSOR INT_MAX
int last_processor;
};
--- /dev/null
+/******************************************************************************
+ * flushtlb.c
+ * based on x86 flushtlb.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef __ASM_FLUSHTLB_H__
+#define __ASM_FLUSHTLB_H__
+
+#ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
+
+#include <xen/percpu.h>
+
+extern volatile u32 tlbflush_clock;
+#define tlbflush_current_time() tlbflush_clock
+
+u32 tlbflush_clock_inc_and_return(void);
+
+static inline void
+tlbflush_update_time(volatile u32* time, u32 timestamp)
+{
+ /*
+ * this should be ld4.rel + st4.acq. but only have release semantcis.
+ * so this function can't be considered as memory barrier.
+ */
+ *time = timestamp;
+}
+
+/*
+ * taken from x86's NEED_FLUSH()
+ * obj_stamp: mTLB time stamp, per pcpu VHPT stamp, per vcpu VHPT stamp.
+ */
+static inline int
+NEED_FLUSH(u32 obj_stamp, u32 lastuse_stamp)
+{
+ u32 curr_time = tlbflush_current_time();
+ /*
+ * Two cases:
+ * 1. During a wrap, the clock ticks over to 0 while CPUs catch up. For
+ * safety during this period, we force a flush if @curr_time == 0.
+ * 2. Otherwise, we look to see if @cpu_stamp <= @lastuse_stamp.
+ * To detect false positives because @cpu_stamp has wrapped, we
+ * also check @curr_time. If less than @lastuse_stamp we definitely
+ * wrapped, so there's no need for a flush (one is forced every wrap).
+ */
+ return ((curr_time == 0) ||
+ ((obj_stamp <= lastuse_stamp) && (lastuse_stamp <= curr_time)));
+}
+
+DECLARE_PER_CPU(volatile u32, tlbflush_time);
+DECLARE_PER_CPU(volatile u32, vhpt_tlbflush_timestamp);
+
+#else
+
+#define tlbflush_current_time() (0)
+#define tlbflush_clock_inc_and_return() (0)
+#define tlbflush_update_time(time, timestamp) do {(void)timestamp;} while (0)
+#define NEED_FLUSH(obj_stamp, lastuse_stamp) (1)
+
+#endif /* CONFIG_XEN_IA64_TLBFLUSH_CLOCK */
+
+#endif /* __ASM_FLUSHTLB_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
#include <asm/processor.h>
#include <asm/atomic.h>
#include <asm/tlbflush.h>
+#include <asm/flushtlb.h>
#include <asm/io.h>
#include <public/xen.h>
PERFCOUNTER_CPU(tlb_track_use_rr7, "tlb_track_use_rr7")
PERFCOUNTER_CPU(tlb_track_swap_rr0, "tlb_track_swap_rr0")
#endif
+
+// tlb flush clock
+#ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
+PERFCOUNTER_CPU(tlbflush_clock_cswitch_purge, "tlbflush_clock_cswitch_purge")
+PERFCOUNTER_CPU(tlbflush_clock_cswitch_skip, "tlbflush_clock_cswitch_skip")
+#endif
cpumask_t pcpu_dirty_mask;
vcpumask_t vcpu_dirty_mask;
- // tlbflush_timestamp;
#ifdef CONFIG_TLB_TRACK_CNT
#define TLB_TRACK_CNT_FORCE_MANY 256 /* XXX how many? */
#define tlb_track_entry_printf(entry) \
__tlb_track_entry_printf(__func__, __LINE__, (entry))
#else
-
-#define tlb_track_create(d) (0)
-#define tlb_track_destroy(d) do { } while (0)
-#define vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry) do { } while (0)
-
+//define as nop
+#define tlb_track_create(d) do { } while (0)
+#define tlb_track_destroy(d) do { } while (0)
+#define tlb_track_free_entry(tlb_track, entry) do { } while (0)
+#define vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry) \
+ do { } while (0)
+#define tlb_track_search_and_remove(tlb_track, ptep, old_pte, entryp) \
+ do { } while (0)
+#define tlb_track_entry_printf(entry) do { } while (0)
#endif /* CONFIG_XEN_IA64_TLB_TRACK */
#endif /* __TLB_TRACK_H__ */
/* Flush local machine TLB. */
void local_flush_tlb_all (void);
-#define tlbflush_current_time() 0
#define tlbflush_filter(x,y) ((void)0)
#endif
extern void vhpt_insert (unsigned long vadr, unsigned long pte,
unsigned long logps);
void local_vhpt_flush(void);
+extern void vcpu_vhpt_flush(struct vcpu* v);
/* Currently the VHPT is allocated per CPU. */
DECLARE_PER_CPU (unsigned long, vhpt_paddr);